
Application of NAMIC SVM to example data
========================================

#conversion
foreach i ( ../SPHARM/group*_procalign.meta )
  MeshConvert $i mva/$i:t:r.mva
end

#training
svmTrain -k linear -1 mva/groupA* -2 mva/groupB* -o SVM_allAvsB_lin.svm >! lin_report.txt

#test on training data
svmClassify -s SVM_allAvsB_lin.svm -i mva/*mva -o SVM_labels_lin.txt >> lin_report.txt

This of course results in a perfect classification

more SVM_labels.txt
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

#lets check out the direction
svmDDCalculator -s SVM_allAvsB_lin.svm -n oneNorm -o SVM_DD_lin_allAvsB_ -f ascii
The DD is all 0 ?? => Error

# what happens if I leave away the normalization
svmDDCalculator -s SVM_allAvsB_lin.svm -o SVM_DD_lin_allAvsB_ -f ascii >> lin_report.txt
The DD is something that makes sense

 MeshMath SVM_DD_lin_allAvsB_0.mvh Lin_DDmagnitude.txt -magnitude -scale 20 -v >> lin_report.txt
 MeshMath SVM_DD_lin_allAvsB_0.mvh Lin_DDscaled.txt -scale 20 >> lin_report.txt


#leave one out
svmJackknife -v -k linear -1 mva/groupA*mva -2 mva/groupB*mva -j 1000 -s 2 -o Jackknife_Lin_err.txt >>! lin_report.txt
% training set size, test set size, class 1 error, class 2 error, overall error
20 1 0.487 0.327 0.407
18 3 0.435 0.342667 0.388833
16 5 0.4204 0.3536 0.387
14 7 0.403143 0.362 0.382571
12 9 0.380667 0.360556 0.370611
10 11 0.388182 0.352818 0.3705
8 13 0.366462 0.375231 0.370846
6 15 0.353667 0.386 0.369833
4 17 0.344882 0.412118 0.3785
2 19 0.350158 0.481895 0.416026
around 10 best, if more known -> overfitting as class 2 error goes up



#polynomial, has to have 3 parameters gamma r power
svmTrain -k polynomial --kernelParameters 1 0 2 -1 mva/groupA* -2 mva/groupB* -o SVM_allAvsB_poly.svm >! poly_report.txt
svmClassify -s SVM_allAvsB_poly.svm -i mva/*mva -o SVM_labels_poly.txt >> poly_report.txt
svmDDCalculator -s SVM_allAvsB_poly.svm -o SVM_DD_poly_allAvsB_ -f ascii >> poly_report.txt
 MeshMath SVM_DD_poly_allAvsB_0.mvh Poly_DDmagnitude.txt -magnitude -scale 20 -v >> poly_report.txt
 MeshMath SVM_DD_poly_allAvsB_0.mvh Poly_DDscaled.txt -scale 20 >> poly_report.txt
# error in DD calculator?????????

#radial basis functions
svmTrain -k rbf  --kernelParameters 0.01 -1 mva/groupA* -2 mva/groupB* -o SVM_allAvsB_rbf.svm >! rbf_report.txt
svmClassify -s SVM_allAvsB_rbf.svm -i mva/*mva -o SVM_labels_rbf.txt >> rbf_report.txt
svmDDCalculator -s SVM_allAvsB_rbf.svm -o SVM_DD_rbf_allAvsB_ -f ascii >> rbf_report.txt
 MeshMath SVM_DD_rbf_allAvsB_0.mvh Rbf_DDmagnitude.txt -magnitude -scale 20 -v >> rbf_report.txt
 MeshMath SVM_DD_rbf_allAvsB_0.mvh Rbf_DDscaled.txt -scale 20 >> rbf_report.txt


svmJackknife -v -k rbf  --kernelParameters 0.01 -1 mva/groupA*mva -2 mva/groupB*mva -j 1000 -s 2 -o Jackknife_rbf_err.txt >> rbf_report.txt
% training set size, test set size, class 1 error, class 2 error, overall error
20 1 0.772 0.131 0.4515
18 3 0.740333 0.166 0.453167
16 5 0.7082 0.2156 0.4619
14 7 0.675286 0.252857 0.464071
12 9 0.661556 0.276111 0.468833
10 11 0.635909 0.296727 0.466318
8 13 0.621769 0.328538 0.475154
6 15 0.598333 0.340333 0.469333
4 17 0.559235 0.371588 0.465412
2 19 0.469211 0.428105 0.448658
#  problems with underfitting? , overall error is high

svmJackknife -v -k rbf  --kernelParameters 0.001 -1 mva/groupA*mva -2 mva/groupB*mva -j 1000 -s 2 -o Jackknife_rbf_err.txt >> rbf_report.txt
% training set size, test set size, class 1 error, class 2 error, overall error
20 1 0.399 0.284 0.3415
18 3 0.363 0.302333 0.332667
16 5 0.338 0.3222 0.3301
14 7 0.344857 0.337 0.340929
12 9 0.358111 0.348667 0.353389
10 11 0.359727 0.351636 0.355682
8 13 0.363846 0.366462 0.365154
6 15 0.3746 0.396667 0.385633
4 17 0.379176 0.424471 0.401824
2 19 0.392 0.472368 0.432184
#  best so far, limited overfitting
